{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": "基本原理和离散动作是一样的,连续动作的概率使用高斯密度函数计算即可.（使用钟摆）"
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T12:49:12.771435Z",
     "start_time": "2025-01-04T12:49:12.705145Z"
    }
   },
   "source": [
    "import gym\n",
    "\n",
    "\n",
    "#定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "\n",
    "    def __init__(self):\n",
    "        env = gym.make('Pendulum-v1', render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "        self.step_n = 0\n",
    "\n",
    "    def reset(self):\n",
    "        state, _ = self.env.reset()\n",
    "        self.step_n = 0\n",
    "        return state\n",
    "\n",
    "    def step(self, action):\n",
    "        state, reward, terminated, truncated, info = self.env.step(\n",
    "            [action * 2])\n",
    "        over = terminated or truncated\n",
    "\n",
    "        #偏移reward,便于训练\n",
    "        reward = (reward + 8) / 8\n",
    "\n",
    "        #限制最大步数\n",
    "        self.step_n += 1\n",
    "        if self.step_n >= 200:\n",
    "            over = True\n",
    "\n",
    "        return state, reward, over\n",
    "\n",
    "    #打印游戏图像\n",
    "    def show(self):\n",
    "        from matplotlib import pyplot as plt\n",
    "        plt.figure(figsize=(3, 3))\n",
    "        plt.imshow(self.env.render())\n",
    "        plt.show()\n",
    "\n",
    "\n",
    "env = MyWrapper()\n",
    "\n",
    "env.reset()\n",
    "\n",
    "env.show()"
   ],
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ],
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdZklEQVR4nO3df3BTZaI+8OekTdKfSSnQhNpWegcUevmxUrBk3VlmpVK1q7LgXWXR7SIXFYPDD4e7dldx1tm55eqdVVQEdxzBuS7UKW5REdBasKyXUCBQLQUrfhdpljYptDQphSZt837/YHuukYI9bdO3oc9n5syQ875v8wTNQ3JOeqIIIQSIiAaZTnYAIhqeWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCSFtPJZv349xo4di5iYGOTk5ODgwYOyohCRBFLK591338WqVavw3HPP4ciRI5g6dSry8vLQ2NgoIw4RSaDI+MXSnJwczJgxA6+99hoAIBgMIj09HU8++SSefvrpH1wfDAZRX1+PxMREKIoS7rhE1EtCCLS2tiI1NRU63bVf20QPUiZVIBCA0+lEYWGhuk+n0yE3NxcOh6PHNX6/H36/X7195swZZGVlhT0rEfWNy+VCWlraNecMevmcO3cOXV1dsFgsIfstFgu++uqrHtcUFRXhD3/4wxX7XS4XTCZTWHISkXY+nw/p6elITEz8wbmDXj59UVhYiFWrVqm3ux+gyWRi+RANQb05HDLo5TNq1ChERUXB4/GE7Pd4PLBarT2uMRqNMBqNgxGPiAbJoJ/tMhgMyM7ORnl5ubovGAyivLwcNpttsOMQkSRS3natWrUKBQUFmD59Om699Va8/PLLaGtrw6JFi2TEISIJpJTPAw88gLNnz2LNmjVwu9340Y9+hN27d19xEJqIrl9SPufTXz6fD2azGV6vlweciYYQLc9N/m4XEUnB8iEiKVg+RCQFy4eIpGD5EJEULB8ikoLlQ0RSsHyISAqWDxFJwfIhIilYPkQkBcuHiKRg+RCRFCwfIpKC5UNEUrB8iEgKlg8RScHyISIpWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCQFy4eIpGD5EJEULB8ikoLlQ0RSsHyISAqWDxFJwfIhIilYPkQkheby2bdvH+655x6kpqZCURRs3749ZFwIgTVr1mDMmDGIjY1Fbm4uTp48GTKnubkZCxcuhMlkQlJSEhYvXowLFy7064EQUWTRXD5tbW2YOnUq1q9f3+P4Cy+8gFdeeQUbN25EZWUl4uPjkZeXh/b2dnXOwoULUVNTg7KyMuzYsQP79u3Do48+2vdHQUSRR/QDAFFaWqreDgaDwmq1ihdffFHd19LSIoxGo9i6dasQQojjx48LAOLQoUPqnF27dglFUcSZM2d6db9er1cAEF6vtz/xiWiAaXluDugxn1OnTsHtdiM3N1fdZzabkZOTA4fDAQBwOBxISkrC9OnT1Tm5ubnQ6XSorKzs8ef6/X74fL6QjYgi24CWj9vtBgBYLJaQ/RaLRR1zu91ISUkJGY+OjkZycrI65/uKiopgNpvVLT09fSBjE5EEEXG2q7CwEF6vV91cLpfsSETUTwNaPlarFQDg8XhC9ns8HnXMarWisbExZLyzsxPNzc3qnO8zGo0wmUwhGxFFtgEtn8zMTFitVpSXl6v7fD4fKisrYbPZAAA2mw0tLS1wOp3qnD179iAYDCInJ2cg4xDREBatdcGFCxfwzTffqLdPnTqFqqoqJCcnIyMjAytWrMAf//hHjB8/HpmZmXj22WeRmpqKuXPnAgAmTpyIO++8E0uWLMHGjRvR0dGBZcuW4cEHH0RqauqAPTAiGuK0nkrbu3evAHDFVlBQIIS4fLr92WefFRaLRRiNRjF79mxRW1sb8jOamprEggULREJCgjCZTGLRokWitbW11xl4qp1oaNLy3FSEEEJi9/WJz+eD2WyG1+vl8R+iIUTLczMiznYR0fWH5UNEUrB8iEgKlg8RScHyISIpWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCSF5ouJ0fVBBIPwNzTAe+gQLrlc0On1iJ8wAaZbbkG0yQRFUWRHpOscy2cYEp2dOFdejoatW9HR3KzuP7t7N+LGjUP6kiWIv+kmFhCFFd92DTMiGERTRQX+8dZbIcUDAAgGcfHrr/H//vM/cf5//xcReJ05iiAsn2EmcO4c6v/yFwQvXbrqnM7z51H32mu4ePIkC4jChuUzzHgrK9HR1PSD87ouXsQ/Nm1C5/nzg5CKhiOWzzATDASAXr6auVBTgzPvvINgR0eYU9FwxPKha2retw9Nn34KEQzKjkLXGZbPMBM/YQJ0MTG9ni8CAZx55x1cOnWKx39oQLF8hpn4m26COTtb05qu1lacXr8eHU1NLCAaMCyfYUan1yNl7lzok5M1rbv4zTdoKCmB6OwMUzIablg+w1D8+PGw3n8/lGhtnzFtKivD2Z07efyHBgTLZxhSdDqMysvDqDlzNK0TnZ1wv/ce2k6eDFMyGk5YPsOUTq+H9Ze/RNz48ZrWdba04PSrryJw9myYktFwwfIZxvQjRuDGJ55AtNmsaV27y4WGd9+9/Jkhoj5i+QxjiqIg9l/+BakPPQTFYOj9QiFw7tNPefyH+oXlM8wpioKRt9+OkT/7mbaFwSDcpaVoq63l6XfqE5YPQafXI/VXv0LCv/6rpnWd58/j25dfRrvLxQIizVg+BACITkpC2iOPICoxUdM6f0MD3Nu2QfD4D2nE8iEAl99+xY0bh/R//3fojEZNa5srKlC/ZQuP/5AmLB9SKYqCET/5CUb85CfaFgqBc2VlaK2u5tsv6jWWD4XQ6fVIe+QRJE6Zomld14UL+Pbll+Gvr2cBUa9oKp+ioiLMmDEDiYmJSElJwdy5c1FbWxsyp729HXa7HSNHjkRCQgLmz58Pj8cTMqeurg75+fmIi4tDSkoKVq9ejU7+ztCQEZWQgNSHHkK0yaRpXUdT0+Xr/1zjKolE3TSVT0VFBex2Ow4cOICysjJ0dHRgzpw5aGtrU+esXLkSH374IUpKSlBRUYH6+nrMmzdPHe/q6kJ+fj4CgQD279+Pt99+G5s3b8aaNWsG7lFRvyiKgvibb0b6Y49pPv7Tsn8/3O+9B9HVFaZ0dL1QRD9eI589exYpKSmoqKjAT3/6U3i9XowePRpbtmzB/fffDwD46quvMHHiRDgcDsycORO7du3Cz3/+c9TX18NisQAANm7ciN/+9rc4e/YsDL34sJvP54PZbIbX64VJ47/O1HvBjg7UbdyIpk8/7fXVDwEgKj4eY1euhHnGDH4DxjCj5bnZr2M+Xq8XAJD8z8szOJ1OdHR0IDc3V50zYcIEZGRkwOFwAAAcDgcmT56sFg8A5OXlwefzoaampsf78fv98Pl8IRuFn06vR9qiRUicPFnTuq62Nrj+/Gf43W4e/6Gr6nP5BINBrFixArfddhsmTZoEAHC73TAYDEhKSgqZa7FY4Ha71TnfLZ7u8e6xnhQVFcFsNqtbenp6X2OTRlHx8Uh96CHN1/8JNDbC9ec/o+vixTAlo0jX5/Kx2+04duwYiouLBzJPjwoLC+H1etXN5XKF/T7psu7jP2mLFwM6bf+7+JxONH7wAY//UI/6VD7Lli3Djh07sHfvXqSlpan7rVYrAoEAWlpaQuZ7PB5YrVZ1zvfPfnXf7p7zfUajESaTKWSjwaMoCpJycjD67rsBjcdwPKWlOO9w8O0XXUFT+QghsGzZMpSWlmLPnj3IzMwMGc/OzoZer0d5ebm6r7a2FnV1dbDZbAAAm82G6upqNDY2qnPKyspgMpmQlZXVn8dCYaQzGHDDQw8hXuP1f4Lt7aj/n/+B/8yZMCWjSKWpfOx2O9555x1s2bIFiYmJcLvdcLvduPTPz3WYzWYsXrwYq1atwt69e+F0OrFo0SLYbDbMnDkTADBnzhxkZWXh4YcfxhdffIGPP/4YzzzzDOx2O4waT+vS4NLFxiLDbofhKq9Qr8bf0IC6N95AZ2trmJJRJNJ0qv1qp003bdqE3/zmNwAuf8jwqaeewtatW+H3+5GXl4fXX3895C3V6dOnsXTpUnz22WeIj49HQUEB1q5di+heXlOYp9rlEULAe/gwTv33f2v7MKFOB+u//RtSH3wQSlRU+AKSVFqem/36nI8sLB+5RFcX6rduhXvbNkDDL5PqjEakLVmCUXfcwc//XKcG7XM+NDwpUVGwzpsH07RpmtYF/X7Uv/MO2v/xDx6AJpYP9U1UXBzSFi2CMTVV07rOlhacXrcOHc3NYUpGkYLlQ30Wk5aGtEWLoOj1mta1ff01PNu38wsIhzmWD/WZoigwT5+OMb/8JaDxIPLZjz5C486dfPs1jLF8qF+UqCik3HsvTBqv/yM6O+EpLcWlv/89TMloqGP5UL/pYmKQ8cQTiMnI0LSuo6kJ365bh0BzM18BDUMsH+o3RVFgSElB6sKFmq//c+n06csXoOfxn2GH5UMDovv3v8YsWKDtQ4RC4NyuXTj3ySe8AP0ww/KhAaPodBidl6f5+s+iqwvubdtw8Ztv+PZrGGH50ICKio/v8/Gf0+vXo/N7V0Sg6xfLhwacISUFqb/6FXRxcZrWXfr2W9T/5S8I8gsIhwWWDw04RVGQNHMmxjzwgLaF//z+L/d77yHIA9DXPZYPhUX38R9zTs5V5wghcN7vx+Fz53DS50NQCEAIeEpL4TtyhMd/rnMsHwobXWwsMh57DMYxY64YE0Kgrq0Nyw8ehP3AATy2fz+KT51ClxAItrfDs307gu3tElLTYGH5UNgoigL9yJFIf/RRRCUmhowJAP9VXY3jLS3oEgK+jg68duIEjp0/DwC4+Pe/o4tfPnhdY/lQWCmKAtMtt8By771XXP/Z19ERcjsQDMLPi80PGywfCjtFp0PKvfciedYstYAUAD+zWhH9nUK6yWTCjQkJAIAooxGKxm/LoMjSu+uWEvWTLiYGqQsX4kJNDQJnz0JRFBSMG4dEvR6fNjRgTGwsltx0E1JiYgAAI2bN0vxd8RRZeBlVGjRCCLSdOAHP++/D63RCBAIQQkDg8ish4PLbtISsLGT+x3/AoPGLCkk+Lc9NvvKhQdNdLHHjx+Ps7t3wvPceOs6fh/LPf/8UvR6mW25B2uLFLJ5hgOVDg06n1yMlPx/madPgO3oUfrcbuthYJEyciISJExEVGys7Ig0Clg9Joeh0iLnhBsTccIPsKCQJTycQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCQFy4eIpGD5EJEULB8ikoLlQ0RSsHyISAqWDxFJwfIhIik0lc+GDRswZcoUmEwmmEwm2Gw27Nq1Sx1vb2+H3W7HyJEjkZCQgPnz58Pj8YT8jLq6OuTn5yMuLg4pKSlYvXo1OvkFcUTDjqbySUtLw9q1a+F0OnH48GHcfvvtuO+++1BTUwMAWLlyJT788EOUlJSgoqIC9fX1mDdvnrq+q6sL+fn5CAQC2L9/P95++21s3rwZa9asGdhHRURDn+inESNGiDfffFO0tLQIvV4vSkpK1LETJ04IAMLhcAghhNi5c6fQ6XTC7XarczZs2CBMJpPw+/1XvY/29nbh9XrVzeVyCQDC6/X2Nz4RDSCv19vr52afj/l0dXWhuLgYbW1tsNlscDqd6OjoQG5urjpnwoQJyMjIgMPhAAA4HA5MnjwZFotFnZOXlwefz6e+eupJUVERzGazuqWnp/c1NhENEZrLp7q6GgkJCTAajXj88cdRWlqKrKwsuN1uGAwGJCUlhcy3WCxwu90AALfbHVI83ePdY1dTWFgIr9erbi6XS2tsIhpiNF/D+eabb0ZVVRW8Xi+2bduGgoICVFRUhCObymg0wmg0hvU+iGhwaS4fg8GAcePGAQCys7Nx6NAhrFu3Dg888AACgQBaWlpCXv14PB5YrVYAgNVqxcGDB0N+XvfZsO45RDQ89PtzPsFgEH6/H9nZ2dDr9SgvL1fHamtrUVdXB5vNBgCw2Wyorq5GY2OjOqesrAwmkwlZWVn9jUJEEUTTK5/CwkLcddddyMjIQGtrK7Zs2YLPPvsMH3/8McxmMxYvXoxVq1YhOTkZJpMJTz75JGw2G2bOnAkAmDNnDrKysvDwww/jhRdegNvtxjPPPAO73c63VUTDjKbyaWxsxK9//Ws0NDTAbDZjypQp+Pjjj3HHHXcAAF566SXodDrMnz8ffr8feXl5eP3119X1UVFR2LFjB5YuXQqbzYb4+HgUFBTg+eefH9hHRURDHr+rnYgGjJbnJn+3i4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCQFy4eIpGD5EJEULB8ikoLlQ0RSsHyISAqWDxFJwfIhIilYPkQkBcuHiKRg+RCRFCwfIpKC5UNEUrB8iEgKlg8RScHyISIpWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCQFy4eIpGD5EJEULB8ikqJf5bN27VooioIVK1ao+9rb22G32zFy5EgkJCRg/vz58Hg8Ievq6uqQn5+PuLg4pKSkYPXq1ejs7OxPFCKKMH0un0OHDuGNN97AlClTQvavXLkSH374IUpKSlBRUYH6+nrMmzdPHe/q6kJ+fj4CgQD279+Pt99+G5s3b8aaNWv6/iiIKPKIPmhtbRXjx48XZWVlYtasWWL58uVCCCFaWlqEXq8XJSUl6twTJ04IAMLhcAghhNi5c6fQ6XTC7XarczZs2CBMJpPw+/093l97e7vwer3q5nK5BADh9Xr7Ep+IwsTr9fb6udmnVz52ux35+fnIzc0N2e90OtHR0RGyf8KECcjIyIDD4QAAOBwOTJ48GRaLRZ2Tl5cHn8+HmpqaHu+vqKgIZrNZ3dLT0/sSm4iGEM3lU1xcjCNHjqCoqOiKMbfbDYPBgKSkpJD9FosFbrdbnfPd4uke7x7rSWFhIbxer7q5XC6tsYloiInWMtnlcmH58uUoKytDTExMuDJdwWg0wmg0Dtr9EVH4aXrl43Q60djYiGnTpiE6OhrR0dGoqKjAK6+8gujoaFgsFgQCAbS0tISs83g8sFqtAACr1XrF2a/u291ziOj6p6l8Zs+ejerqalRVVanb9OnTsXDhQvXPer0e5eXl6pra2lrU1dXBZrMBAGw2G6qrq9HY2KjOKSsrg8lkQlZW1gA9LCIa6jS97UpMTMSkSZNC9sXHx2PkyJHq/sWLF2PVqlVITk6GyWTCk08+CZvNhpkzZwIA5syZg6ysLDz88MN44YUX4Ha78cwzz8But/OtFdEwoql8euOll16CTqfD/Pnz4ff7kZeXh9dff10dj4qKwo4dO7B06VLYbDbEx8ejoKAAzz///EBHIaIhTBFCCNkhtPL5fDCbzfB6vTCZTLLjENE/aXlu8ne7iEgKlg8RScHyISIpWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCQFy4eIpGD5EJEULB8ikoLlQ0RSsHyISAqWDxFJwfIhIilYPkQkBcuHiKRg+RCRFCwfIpKC5UNEUrB8iEgKlg8RScHyISIpWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhICpYPEUnB8iEiKVg+RCQFy4eIpIiWHaAvhBAAAJ/PJzkJEX1X93Oy+zl6LRFZPk1NTQCA9PR0yUmIqCetra0wm83XnBOR5ZOcnAwAqKur+8EHONT4fD6kp6fD5XLBZDLJjtNrzD24IjW3EAKtra1ITU39wbkRWT463eVDVWazOaL+w3yXyWSKyOzMPbgiMXdvXxDwgDMRScHyISIpIrJ8jEYjnnvuORiNRtlRNIvU7Mw9uCI1txaK6M05MSKiARaRr3yIKPKxfIhICpYPEUnB8iEiKVg+RCRFRJbP+vXrMXbsWMTExCAnJwcHDx6Ummffvn245557kJqaCkVRsH379pBxIQTWrFmDMWPGIDY2Frm5uTh58mTInObmZixcuBAmkwlJSUlYvHgxLly4ENbcRUVFmDFjBhITE5GSkoK5c+eitrY2ZE57ezvsdjtGjhyJhIQEzJ8/Hx6PJ2ROXV0d8vPzERcXh5SUFKxevRqdnZ1hy71hwwZMmTJF/fSvzWbDrl27hnTmnqxduxaKomDFihURl31AiAhTXFwsDAaDeOutt0RNTY1YsmSJSEpKEh6PR1qmnTt3it///vfir3/9qwAgSktLQ8bXrl0rzGaz2L59u/jiiy/EvffeKzIzM8WlS5fUOXfeeaeYOnWqOHDggPjb3/4mxo0bJxYsWBDW3Hl5eWLTpk3i2LFjoqqqStx9990iIyNDXLhwQZ3z+OOPi/T0dFFeXi4OHz4sZs6cKX784x+r452dnWLSpEkiNzdXHD16VOzcuVOMGjVKFBYWhi33Bx98ID766CPx9ddfi9raWvG73/1O6PV6cezYsSGb+fsOHjwoxo4dK6ZMmSKWL1+u7o+E7AMl4srn1ltvFXa7Xb3d1dUlUlNTRVFRkcRU/+f75RMMBoXVahUvvviiuq+lpUUYjUaxdetWIYQQx48fFwDEoUOH1Dm7du0SiqKIM2fODFr2xsZGAUBUVFSoOfV6vSgpKVHnnDhxQgAQDodDCHG5eHU6nXC73eqcDRs2CJPJJPx+/6BlHzFihHjzzTcjInNra6sYP368KCsrE7NmzVLLJxKyD6SIetsVCATgdDqRm5ur7tPpdMjNzYXD4ZCY7OpOnToFt9sdktlsNiMnJ0fN7HA4kJSUhOnTp6tzcnNzodPpUFlZOWhZvV4vgP+7aoDT6URHR0dI9gkTJiAjIyMk++TJk2GxWNQ5eXl58Pl8qKmpCXvmrq4uFBcXo62tDTabLSIy2+125Ofnh2QEIuPveyBF1G+1nzt3Dl1dXSF/8QBgsVjw1VdfSUp1bW63GwB6zNw95na7kZKSEjIeHR2N5ORkdU64BYNBrFixArfddhsmTZqk5jIYDEhKSrpm9p4eW/dYuFRXV8Nms6G9vR0JCQkoLS1FVlYWqqqqhmxmACguLsaRI0dw6NChK8aG8t93OERU+VD42O12HDt2DJ9//rnsKL1y8803o6qqCl6vF9u2bUNBQQEqKipkx7oml8uF5cuXo6ysDDExMbLjSBdRb7tGjRqFqKioK47+ezweWK1WSamurTvXtTJbrVY0NjaGjHd2dqK5uXlQHteyZcuwY8cO7N27F2lpaep+q9WKQCCAlpaWa2bv6bF1j4WLwWDAuHHjkJ2djaKiIkydOhXr1q0b0pmdTicaGxsxbdo0REdHIzo6GhUVFXjllVcQHR0Ni8UyZLOHQ0SVj8FgQHZ2NsrLy9V9wWAQ5eXlsNlsEpNdXWZmJqxWa0hmn8+HyspKNbPNZkNLSwucTqc6Z8+ePQgGg8jJyQlbNiEEli1bhtLSUuzZsweZmZkh49nZ2dDr9SHZa2trUVdXF5K9uro6pDzLyspgMpmQlZUVtuzfFwwG4ff7h3Tm2bNno7q6GlVVVeo2ffp0LFy4UP3zUM0eFrKPeGtVXFwsjEaj2Lx5szh+/Lh49NFHRVJSUsjR/8HW2toqjh49Ko4ePSoAiD/96U/i6NGj4vTp00KIy6fak5KSxPvvvy++/PJLcd999/V4qv2WW24RlZWV4vPPPxfjx48P+6n2pUuXCrPZLD777DPR0NCgbhcvXlTnPP744yIjI0Ps2bNHHD58WNhsNmGz2dTx7lO/c+bMEVVVVWL37t1i9OjRYT31+/TTT4uKigpx6tQp8eWXX4qnn35aKIoiPvnkkyGb+Wq+e7Yr0rL3V8SVjxBCvPrqqyIjI0MYDAZx6623igMHDkjNs3fvXgHgiq2goEAIcfl0+7PPPissFoswGo1i9uzZora2NuRnNDU1iQULFoiEhARhMpnEokWLRGtra1hz95QZgNi0aZM659KlS+KJJ54QI0aMEHFxceIXv/iFaGhoCPk53377rbjrrrtEbGysGDVqlHjqqadER0dH2HI/8sgj4sYbbxQGg0GMHj1azJ49Wy2eoZr5ar5fPpGUvb94PR8ikiKijvkQ0fWD5UNEUrB8iEgKlg8RScHyISIpWD5EJAXLh4ikYPkQkRQsHyKSguVDRFKwfIhIiv8PaSlPagjsWHIAAAAASUVORK5CYII="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T14:43:15.786985Z",
     "start_time": "2025-01-04T14:43:15.599180Z"
    }
   },
   "source": [
    "import torch\n",
    "# 定义连续动作空间下的模型\n",
    "# 相比于离散动作，这里模型需要输出动作的均值 (mu) 和方差 (sigma)\n",
    "class Model(torch.nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__()  # 调用父类构造函数\n",
    "        # 公共特征提取网络（共享部分），处理输入状态并提取高维特征\n",
    "        self.s = torch.nn.Sequential(\n",
    "            torch.nn.Linear(3, 64),  # 输入为3维（假设环境状态的特征维度为3），输出64个特征\n",
    "            torch.nn.ReLU(),         # 激活函数：ReLU\n",
    "            torch.nn.Linear(64, 64), # 64个特征经过进一步的线性变换\n",
    "            torch.nn.ReLU(),         # 激活函数：ReLU\n",
    "        )\n",
    "        # 动作均值 (mu) 的网络部分\n",
    "        self.mu = torch.nn.Sequential(\n",
    "            torch.nn.Linear(64, 1),  # 从64个特征降维到1维（对应一个连续动作的均值）\n",
    "            torch.nn.Tanh(),         # 输出范围限定在 (-1, 1)，适用于有界连续动作\n",
    "        )\n",
    "        # 动作方差 (sigma) 的网络部分\n",
    "        self.sigma = torch.nn.Sequential(\n",
    "            torch.nn.Linear(64, 1),  # 从64个特征降维到1维（对应动作的标准差）\n",
    "            torch.nn.Tanh(),         # 输出范围限定在 (-1, 1)，不过后续会通过 exp() 确保方差为正\n",
    "        )\n",
    "\n",
    "    def forward(self, state):\n",
    "        # 前向传播函数：输入状态向量，输出动作的均值和标准差\n",
    "        state = self.s(state)  # 通过公共特征提取网络提取状态的高维特征\n",
    "        # 返回动作的均值和标准差：\n",
    "        # - 均值 (mu)：表示模型推荐的动作\n",
    "        # - 标准差 (sigma)：通过 exp() 确保方差始终为正\n",
    "        return self.mu(state), self.sigma(state).exp()\n",
    "\n",
    "# 定义动作策略网络，输出连续动作的均值 (mu) 和方差 (sigma)\n",
    "model_action = Model()\n",
    "\n",
    "# 定义价值网络（Value Network）\n",
    "# 用于估计状态的价值 V(s)\n",
    "model_value = torch.nn.Sequential(\n",
    "    torch.nn.Linear(3, 64),  # 输入为3维（假设环境状态的特征维度为3），输出64个特征\n",
    "    torch.nn.ReLU(),         # 激活函数：ReLU\n",
    "    torch.nn.Linear(64, 64), # 64个特征经过进一步的线性变换\n",
    "    torch.nn.ReLU(),         # 激活函数：ReLU\n",
    "    torch.nn.Linear(64, 1),  # 最后一层输出1个值，对应状态的价值 V(s)\n",
    ")\n",
    "\n",
    "# 测试模型：\n",
    "# 输入随机的状态向量（2个样本，3维特征）\n",
    "# 输出：\n",
    "# - 动作策略网络的输出 (mu 和 sigma)\n",
    "# - 价值网络的输出 (状态价值 V(s))\n",
    "model_action(torch.randn(2, 3)), model_value(torch.randn(2, 3))\n",
    "\n",
    "\n"
   ],
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((tensor([[-0.2066],\n",
       "          [-0.1128]], grad_fn=<TanhBackward0>),\n",
       "  tensor([[1.2154],\n",
       "          [1.2643]], grad_fn=<ExpBackward0>)),\n",
       " tensor([[ 0.0111],\n",
       "         [-0.0973]], grad_fn=<AddmmBackward0>))"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T11:39:27.918934Z",
     "start_time": "2025-01-04T11:39:27.883942Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\app\\anaconda3\\install\\envs\\pytorch_env_3_8\\lib\\site-packages\\gym\\utils\\passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
      "  if not isinstance(terminated, (bool, np.bool8)):\n",
      "C:\\Users\\25036\\AppData\\Local\\Temp\\ipykernel_47120\\3994207745.py:34: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\torch\\csrc\\utils\\tensor_new.cpp:281.)\n",
      "  state = torch.FloatTensor(state).reshape(-1, 3)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "65.38080596923828"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "import random\n",
    "\n",
    "\n",
    "#玩一局游戏并记录数据\n",
    "def play(show=False):\n",
    "    state = []\n",
    "    action = []\n",
    "    reward = []\n",
    "    next_state = []\n",
    "    over = []\n",
    "\n",
    "    s = env.reset()\n",
    "    o = False\n",
    "    while not o:\n",
    "        #根据概率采样（这两行和离散动作不同）\n",
    "        '''\n",
    "        mu：动作分布的均值（推荐的最优动作）。\n",
    "        sigma：动作分布的标准差（表示动作的不确定性）。\n",
    "        在连续动作空间中，策略网络生成的是一个 正态分布 N(μ,σ**2)，智能体将在这个分布中采样动作。μ表示最优动作（利用），σ增加随机性（探索）\n",
    "        random.normalvariate:从正态分布中采样动作\n",
    "        '''\n",
    "        mu, sigma = model_action(torch.FloatTensor(s).reshape(1, 3))\n",
    "        a = random.normalvariate(mu=mu.item(), sigma=sigma.item())\n",
    "\n",
    "        ns, r, o = env.step(a)\n",
    "\n",
    "        state.append(s)\n",
    "        action.append(a)\n",
    "        reward.append(r)\n",
    "        next_state.append(ns)\n",
    "        over.append(o)\n",
    "\n",
    "        s = ns\n",
    "\n",
    "        if show:\n",
    "            display.clear_output(wait=True)\n",
    "            env.show()\n",
    "\n",
    "    state = torch.FloatTensor(state).reshape(-1, 3)\n",
    "    action = torch.FloatTensor(action).reshape(-1, 1)\n",
    "    reward = torch.FloatTensor(reward).reshape(-1, 1)\n",
    "    next_state = torch.FloatTensor(next_state).reshape(-1, 3)\n",
    "    over = torch.LongTensor(over).reshape(-1, 1)\n",
    "\n",
    "    return state, action, reward, next_state, over, reward.sum().item()\n",
    "\n",
    "\n",
    "state, action, reward, next_state, over, reward_sum = play()\n",
    "\n",
    "reward_sum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T11:39:28.685995Z",
     "start_time": "2025-01-04T11:39:27.960211Z"
    }
   },
   "outputs": [],
   "source": [
    "optimizer_action = torch.optim.Adam(model_action.parameters(), lr=5e-4)\n",
    "optimizer_value = torch.optim.Adam(model_value.parameters(), lr=5e-3)\n",
    "\n",
    "\n",
    "def requires_grad(model, value):\n",
    "    for param in model.parameters():\n",
    "        param.requires_grad_(value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T11:39:28.862707Z",
     "start_time": "2025-01-04T11:39:28.701833Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([200, 1])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_value(state, reward, next_state, over):\n",
    "    requires_grad(model_action, False)\n",
    "    requires_grad(model_value, True)\n",
    "\n",
    "    #计算target\n",
    "    with torch.no_grad():\n",
    "        target = model_value(next_state)\n",
    "    target = target * 0.98 * (1 - over) + reward\n",
    "\n",
    "    #每批数据反复训练10次\n",
    "    for _ in range(10):\n",
    "        #计算value\n",
    "        value = model_value(state)\n",
    "\n",
    "        loss = torch.nn.functional.mse_loss(value, target)\n",
    "        loss.backward()\n",
    "        optimizer_value.step()\n",
    "        optimizer_value.zero_grad()\n",
    "\n",
    "    #减去value相当于去基线\n",
    "    return (target - value).detach()\n",
    "\n",
    "\n",
    "value = train_value(state, reward, next_state, over)\n",
    "\n",
    "value.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T11:39:29.106097Z",
     "start_time": "2025-01-04T11:39:28.950267Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.37152689695358276"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_action(state, action, value):\n",
    "    requires_grad(model_action, True)\n",
    "    requires_grad(model_value, False)\n",
    "\n",
    "    #计算当前state的价值,其实就是Q(state,action),这里是用蒙特卡洛法估计的\n",
    "    delta = []\n",
    "    for i in range(len(value)):\n",
    "        s = 0\n",
    "        for j in range(i, len(value)):\n",
    "            s += value[j] * (0.9 * 0.9)**(j - i)\n",
    "        delta.append(s)\n",
    "    delta = torch.FloatTensor(delta).reshape(-1, 1)\n",
    "\n",
    "    #更新前的动作概率\n",
    "    with torch.no_grad():\n",
    "        mu, sigma = model_action(state)\n",
    "        # 计算动作概率和离散动作不同，需要用到正态分布\n",
    "        prob_old = torch.distributions.Normal(mu, sigma).log_prob(action).exp()\n",
    "\n",
    "    #每批数据反复训练10次\n",
    "    for _ in range(10):\n",
    "        #更新后的动作概率\n",
    "        mu, sigma = model_action(state)\n",
    "        prob_new = torch.distributions.Normal(mu, sigma).log_prob(action).exp()\n",
    "\n",
    "        #求出概率的变化\n",
    "        ratio = prob_new / prob_old\n",
    "\n",
    "        #计算截断的和不截断的两份loss,取其中小的\n",
    "        surr1 = ratio * delta\n",
    "        surr2 = ratio.clamp(0.8, 1.2) * delta\n",
    "\n",
    "        loss = -torch.min(surr1, surr2).mean()\n",
    "\n",
    "        #更新参数\n",
    "        loss.backward()\n",
    "        optimizer_action.step()\n",
    "        optimizer_action.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_action(state, action, value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-04T11:41:58.886866Z",
     "start_time": "2025-01-04T11:39:29.172275Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 -0.008641012944281101 40.58622150421142\n",
      "100 -0.05295982211828232 57.10105381011963\n",
      "200 -1.0816595554351807 86.14474143981934\n",
      "300 -0.008843365125358105 131.43670158386232\n",
      "400 0.05213010683655739 169.26545295715331\n",
      "500 0.3709363639354706 91.77202796936035\n",
      "600 0.48534226417541504 152.39863090515138\n",
      "700 1.7661621570587158 152.8100845336914\n",
      "800 0.040884390473365784 164.9548194885254\n",
      "900 0.48889076709747314 130.95990447998048\n"
     ]
    }
   ],
   "source": [
    "def train():\n",
    "    model_action.train()\n",
    "    model_value.train()\n",
    "\n",
    "    #训练N局\n",
    "    for epoch in range(1000):\n",
    "        #一个epoch最少玩N步\n",
    "        steps = 0\n",
    "        while steps < 200:\n",
    "            state, action, reward, next_state, over, _ = play()\n",
    "            steps += len(state)\n",
    "\n",
    "            #训练两个模型\n",
    "            delta = train_value(state, reward, next_state, over)\n",
    "            loss = train_action(state, action, delta)\n",
    "\n",
    "        if epoch % 100 == 0:\n",
    "            test_result = sum([play()[-1] for _ in range(20)]) / 20\n",
    "            print(epoch, loss, test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": "play(True)[-1]",
   "execution_count": 18,
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ],
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAltElEQVR4nO3de3RTZb4+8GenSdPSNkkvNAFpuQworVxGAWmU5WUoVKmAB5xxPAxWZDmjU1wgo0uKijOuOdbBc8TRkXLm6AzjnMPlwFhkWpCpBQpIuZVWy63ib5B2KEmB0qSUNm2a7+8PIcdAgaRNu1Pm+ay11zL7fffON2Ae3r139rsVEREQEfUwjdoFENE/J4YPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqUK18Hn//fcxaNAgREREYPz48di3b59apRCRClQJn7Vr12LhwoV47bXXcPDgQYwePRoZGRmoq6tToxwiUoGixo2l48ePx7hx4/C73/0OAODxeJCUlITnnnsOixYtuuH2Ho8HtbW1iImJgaIo3V0uEflJRNDY2Ij+/ftDo7n+2EbbQzV5tba2oqysDDk5Od51Go0G6enpKC0t7XAbl8sFl8vlfX3q1CmkpqZ2e61E1Dk1NTUYMGDAdfv0ePicPXsW7e3tMJvNPuvNZjOOHTvW4Ta5ubn41a9+ddX6mpoaGAyGbqmTiALndDqRlJSEmJiYG/bt8fDpjJycHCxcuND7+vIHNBgMDB+iEOTP6ZAeD5+EhASEhYXBbrf7rLfb7bBYLB1uo9frodfre6I8IuohPX61Kzw8HGPGjEFxcbF3ncfjQXFxMaxWa0+XQ0QqUeWwa+HChcjKysLYsWNx11134Z133kFTUxPmzJmjRjlEpAJVwuexxx7DmTNnsGTJEthsNnz/+9/Hp59+etVJaCK6eanyO5+ucjqdMBqNcDgcPOFMFEIC+W7y3i4iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVQQcPjt27MDUqVPRv39/KIqCDRs2+LSLCJYsWYJ+/fohMjIS6enpOH78uE+f+vp6zJo1CwaDASaTCXPnzsWFCxe69EGIqHcJOHyampowevRovP/++x22L126FO+++y5WrFiBvXv3IioqChkZGWhpafH2mTVrFg4fPoyioiIUFBRgx44d+OlPf9r5T0FEvY90AQDJz8/3vvZ4PGKxWOStt97yrmtoaBC9Xi+rV68WEZEjR44IANm/f7+3z+bNm0VRFDl16pRf7+twOASAOByOrpRPREEWyHczqOd8Tpw4AZvNhvT0dO86o9GI8ePHo7S0FABQWloKk8mEsWPHevukp6dDo9Fg7969He7X5XLB6XT6LETUuwU1fGw2GwDAbDb7rDebzd42m82GxMREn3atVou4uDhvnyvl5ubCaDR6l6SkpGCWTUQq6BVXu3JycuBwOLxLTU2N2iURURcFNXwsFgsAwG63+6y32+3eNovFgrq6Op92t9uN+vp6b58r6fV6GAwGn4WIereghs/gwYNhsVhQXFzsXed0OrF3715YrVYAgNVqRUNDA8rKyrx9tm7dCo/Hg/HjxwezHCIKYdpAN7hw4QK+/vpr7+sTJ06goqICcXFxSE5OxoIFC/DrX/8aw4YNw+DBg/Hqq6+if//+eOSRRwAAKSkpePDBB/H0009jxYoVaGtrw7x58/DjH/8Y/fv3D9oHI6IQF+iltG3btgmAq5asrCwR+fZy+6uvvipms1n0er1MnDhRqqqqfPZx7tw5efzxxyU6OloMBoPMmTNHGhsb/a6Bl9qJQlMg301FRETF7OsUp9MJo9EIh8PB8z9EISSQ72avuNpFRDcfhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpIqA7+2im5+IQNrb4bLZ4HY4oGi10JvN0BqNAABFUVSukG4GDB/yISJo/uYbnF67FhcOHYK7sRGKVotwsxkJkyah74MPIiwyUu0y6SbA8CEvEcHF48dxYtkyuE6d+r/1bW1w/eMfOPXRR2i12XDLk08ygKjLeM6HvNobG1Hz+9/7BI9vh3ac/dvfcGbzZkh7e88WRzcdhg95nd+zB03fmaupI9LejtNr16L5m296pii6aTF8yMtVWwt4PDfs52luhv2TT+Bxu3ugKrpZMXyoUxr27MHFr79GL5wOikIEw4e8dHFxgJ+X0T0tLbB/8gmEox/qJIYPeZnGjYNGr/e7v7OsDE3HjnH0Q53C8CEvXVwcYkaO9Lu/p6UF9vx8eFyubqyKblYMH/LS6PVInD4dSni439s4Dh7E+c8/5+iHAsbwIR/RKSkwjhnj/wYeD+o2bED7hQvdVxTdlBg+5EOj08E8fTo0ERF+b9N88iTqd+7k6IcCwvChq0TddhuM48YFtE1dQQHcDQ3dUxDdlBg+dDWNBonTpiEsKsrvTVz/+AfObd/O0Q/5jeFDV1EUBVFDh8KUlhbQdmc3b0ar3d5NVdHNhuFDHdNokDh1KnSxsX5v4rLZYPvLX3jTKfmF4UMdUhQFkYMGIeGhh/z+1TMAnN+1izedkl8YPnRNikaDhPR0hCcm+r1Ne1MT6v76V952QTfE8KHr0sXFISEjI7DRz549aPp//48nn+m6GD50XYpGg/gHHoC+Xz+/t/FcvIg63nRKN8DwoRvSxcWh74MPBjT6aSgtRcPevRz90DUxfOiGFEVB3H33IWLAAL+3kfb2b286bWnpxsqoN2P4kF+0JhMSH344oG0ufv01Gnbv7qaKqLdj+JBfFEVB7IQJiBo+3P+NRGDfuBHuxsbuK4x6LYYP+S0sOhr9fvSjgKbcaP7mG79uOvV4PHC73XC73Whvb4fH4/n24YU8Z3TTCih8cnNzMW7cOMTExCAxMRGPPPIIqqqqfPq0tLQgOzsb8fHxiI6OxsyZM2G/4if31dXVyMzMRJ8+fZCYmIgXX3wRbl4ZCXmKoiBm1KiAJhyDCM4UFsJ9/vx1u3366aewWq24//77MXPmTCxYsAB5eXnYsWMH6urq0NbWxiC6yQT00MCSkhJkZ2dj3LhxcLvdWLx4MSZPnowjR44g6tJNiM8//zwKCwuxbt06GI1GzJs3DzNmzMDnn38OAGhvb0dmZiYsFgt2796N06dP44knnoBOp8Mbb7wR/E9IQaVcmnLjwuHDfp9MbqmpgW3DBgx48kkomo7/vQsPD0d4eDgaGxtRXV2NHTt2oLW1FVqtFhaLBRMmTMC0adMwYcIEmEwmaK6xH+o9FOnCPydnzpxBYmIiSkpKcO+998LhcKBv375YtWoVHn30UQDAsWPHkJKSgtLSUqSlpWHz5s14+OGHUVtbC7PZDABYsWIFXnrpJZw5cwbhfgzpnU4njEYjHA4HDAZDZ8unTvK0teHEv/87GkpL/d5GazDg1txcRCYlddje1taGlpYWtLe3w+1248yZM/jqq69QXl6OXbt2obKyEs3Nzbjtttswe/ZsPProo7BYLAyhEBPId7NLf3MOhwMAEBcXBwAoKytDW1sb0tPTvX2GDx+O5ORklF76H7W0tBQjR470Bg8AZGRkwOl04vDhwx2+j8vlgtPp9FlIPYpWi8SpUwOacMztdOJMYeE1bzrV6XSIiYmByWRCQkICUlJSMG3aNCxZsgQbN25EUVERFi1aBJfLhZycHEydOhWrVq1CU1MTD8d6qU6Hj8fjwYIFC3DPPfdgxIgRAACbzYbw8HCYTCafvmazGTabzdvnu8Fzuf1yW0dyc3NhNBq9S9I1/vWknqEoyrcTjo0dG9B29Tt2oPnkSb/DQlEUaDQa9OnTByNHjsSiRYtQVFSE3NxcNDU14Wc/+xnmzp2L48ePM4B6oU6HT3Z2Ng4dOoQ1a9YEs54O5eTkwOFweJeamppuf0+6PkWrhXnGDGiNRr+3ab9wAXUFBX49FfWq97sURImJiZg3bx4KCwsxe/ZsFBYWYubMmfjss8/Qzqk8epVOhc+8efNQUFCAbdu2YcB3fvVqsVjQ2tqKhium07Tb7bBYLN4+V179uvz6cp8r6fV6GAwGn4XUpSgK+gwZgvgf/CCg7RpKS9HUhZHK5RD63ve+h3feeQcrVqxAY2MjnnzySaxdu5ZXxXqRgMJHRDBv3jzk5+dj69atGDx4sE/7mDFjoNPpUFxc7F1XVVWF6upqWK1WAIDVakVlZSXq6uq8fYqKimAwGJCamtqVz0I9TNFokJCRAV18vN/btDc14dRHH0Ha2rr8/hEREXj88cexevVqWCwWPP/881i1ahVHQL1EQOGTnZ2N//7v/8aqVasQExMDm80Gm82G5uZmAIDRaMTcuXOxcOFCbNu2DWVlZZgzZw6sVivSLk3JOXnyZKSmpmL27Nn44osvsGXLFrzyyivIzs6GPoCnZVJo0Pfrh4TvXGDwx4UjR+DYvz8o76/RaJCWloaPPvoI3/ve9/DSSy/hk08+gacTh3bUswIKn7y8PDgcDtx///3o16+fd1m7dq23z7Jly/Dwww9j5syZuPfee2GxWPDxxx9728PCwlBQUICwsDBYrVb85Cc/wRNPPIHXX389eJ+KeoyiKIifNCmgCcfg8cCen4/2ixeDVkNqaipWrFgBs9mMF154Afv27ePhV4jr0u981MLf+YQWEYFt/XrU/vnP/m+kKBg0fz7iHngASgBTddyoju3bt2PWrFkYPHgw1q9fD4vFErT904312O98iIBvRx4JEyci/IqfUFzXpZtOg/mkU0VRcO+992Lx4sUoLy/HsmXLeNtOCGP4UFBoY2OROGVKQNv4e9NpIMLCwvDkk09iypQp+MMf/oDdu3fz8CtEMXwoKBRFQXx6emBTbng8sP3v/6L1O1c+gyE6OhqLFy+GXq/HO++8g6ampqDun4KD4UNBExYdDfP06VB0Or+3aauvx5nNmyFBvjo1evRo/OQnP8HWrVuxY8cOjn5CEMOHgkZRFBjuvBPRKSkBbXdu61a4amuDWotGo8GcOXNgNBqxcuVKuFyuoO6fuo7hQ0GliYhA4rRpULT+z9bibmhAXWFhUEc/iqJg6NChmDJlCkpKSlBVVcXRT4hh+FBQXZ5wLPrSzcb+Or9zZ0A3nfojLCwMjz76KFpbW7FlyxaGT4hh+JBfXC4XLl686NcXOCwiAuZHHgls9ON04tSf/gQE8dYIRVEwZswYDB48GFu2bOGhV4hh+NANiQhKSkqwZcsWv7eJGTECMd//fkDv0/jll3B+8UVQRygxMTGYMGECjh07xtkQQgzDh27I7XbjL3/5C/7nf/4HbX7eEKoJD4dlxoyArnyJ2w37xx/DE8QRSlhYGNLS0tDY2IgjR47w0CuEMHzohurq6rB9+3Z8/vnn+Prrr/3eLjolBQmTJgX0Xo2HD8N58GCgJV6ToigYMWIEwsLCrjlTJqmD4UPXJSIoLS1FdXU16urqUFhY6P/oQaNB34cfhvaKmS2vy+OBfcMGtAfxh4EWiwVxcXG84hViGD50XW1tbSgoKEBraysAID8/3zt3940oioKIW25B/AMPdNguIjjvcuHA2bM47nTCcykYmr76CueDeFtEZGQkLBYLampqGD4hhOFD12W327Fjxw7v6y+++ALl5eUBzcPc98EHobv0kIHLRATVTU2Yv28fsvfswc9278aaEyfQLgJ4PDi3bVvQnvOu1WoRHx/PBw+EGIYPXZOIYOfOnTh9+rR3XUtLC9avXx/QbIHhZjPiJ0703TeA31RW4khDA9pF4Gxrw++OHsWhSw8XvPj3v6P90iR1XRUWFoaoqCiOekIMw4euqa2tDYWFhejbty/69++Pfv36Yfz48SguLvYJpBvxTreakOCz3nnFlbNWjweubpoClc/3Cj38G6Frqq2tRWtrK/785z/jzjvvREJCAlauXImMjAzs2bMnoJFEeEICEiZPBi5N7KUAeMBigfY7E33dajBgYHQ0ACBMr7/m000DJSL8gWEICuhxyfTPJSwsDG+//TYsFgs+/PBDtLS0wGQyITc3FydPnoTH40FYWJhf+1I0GiRMmoT6rVvRevYs4HYja+hQxOh0+Oz0afSLjMTTt96KxEsPIoy97z5ogzRLpdvthsPhgC6A3xxR92P40DV997FIiYmJaGxs9D4Se3gg8/ZcoouLw9Bf/hINu3ejds0aaFtb8cNBg/DooEG4PP5RFAXRqanf3p4RpJFPa2sr6urqkJycHJT9UXDwsIuuSVEU7zJw4EA4HA6cOXPGZ32g+4vo3x+J06fjlqws6OLioGg00FzalyY8HMa77sLA+fMRfsXVsa5wOp2oq6vDkCFDOJ9zCOHIh/ySkpICt9uNo0eP4u677+7SvjQ6HRIzM2G88044y8vhstmgiYxEdEoKolNSEBYZGaSqvz3f8/e//x1NTU1ITU1l+IQQhg/5ZciQIUhISEBpaSnmzJnT5atHikaDiFtuQcQttwSpwmsrLy+HVqvF7bff3u3vRf7jYRf5xWKxIDU1FWVlZaivr1e7HL+5XC7s2rULZrMZw4cP58gnhDB8yC+RkZH4wQ9+gOPHj+Po0aNql+O32tpaHDhwAPfccw9MgdxjRt2O4UN+URQFGRkZiIiIQH5+fq94HrqI4LPPPoPD4UBmZqbfPwugnsHwIb+lpKQgLS0NBQUFOHXqlNrlXJeI4MKFC1i9ejUGDhyICRMm8JArxDB8yG96vR5ZWVmora3FunXr4Any426CbevWrTh48CB++MMfIi6Il+4pOBg+5DdFUTBp0iTccccd+PDDD1FdXR2yN2s6nU689957SEhIwI9//GMecoUghg8FxGg04rnnnsOpU6ewfPlyv6dV7Ukejwdr1qxBaWkpnnrqKQwZMkTtkqgDDB8KWGZmJiZPnoyVK1eipKQk5EY/R48exdKlS5GamoqnnnqKo54QxfChgCiKgj59+uDll19GVFQUXn755ZCZIVBEcP78ebz88suor6/Ha6+9BovFonZZdA0MHwqYoigYOXIkXnnlFRw7dgwvvfQSnE6nqgEkImhpacFvfvMbFBUVYd68ecjIyOAVrhDG2yuoU8LCwvCv//qvqKqqwvvvv4/ExET8+te/RnR0dI9/4UUEra2teO+995CXl4epU6fiF7/4BbQBPLSQeh7/dqjTIiMjkZOTg/r6enz44YcAgNdeew2xsbE9FkCXRzzvvvsu3nzzTaSlpeGtt96C0WjkqCfEMXyoS0wmE5YuXQoRwYcffogzZ87gjTfewMCBA7v9yy8iqK+vxxtvvIH/+q//gtVqxfLlyzFgwAAGTy/Acz7UJYqiIDY2Fv/xH/+BZ555BgUFBfjRj36Ezz77rFsvw7e3t6O8vByzZ89GXl4epkyZgg8++IBz9vQmEoDly5fLyJEjJSYmRmJiYiQtLU02bdrkbW9ubpaf//znEhcXJ1FRUTJjxgyx2Ww++zh58qRMmTJFIiMjpW/fvvLCCy9IW1tbIGWIw+EQAOJwOALajrqPx+ORixcvSl5engwYMEDi4+PlhRdekBMnTojH4xGPxxOU9/B4PGK322Xp0qWSlJQkJpNJXn31VTl//nxQ3oO6JpDvZkDhs3HjRiksLJSvvvpKqqqqZPHixaLT6eTQoUMiIvLMM89IUlKSFBcXy4EDByQtLU3uvvtu7/Zut1tGjBgh6enpUl5eLps2bZKEhATJycnptg9IPcfj8Yjb7ZY9e/Z4/4G59dZb5d/+7d/k+PHj0tbW1qmAuLzf2tpa+f3vfy9jx46VyMhISUtLk8LCQmltbWXwhIhuC5+OxMbGygcffCANDQ2i0+lk3bp13rajR48KACktLRURkU2bNolGo/EZDeXl5YnBYBCXy3XN92hpaRGHw+FdampqGD4hzOPxSENDg6xcuVLGjRsnffr0kYEDB8qcOXNk/fr1cvLkSbl48eI1w+hy2DQ3N0ttba1s2rRJ5s+fL8OHD5c+ffpIamqqvP3222K32xk6ISaQ8FFEOvfjjPb2dqxbtw5ZWVkoLy+HzWbDxIkTcf78eZ95UwYOHIgFCxbg+eefx5IlS7Bx40ZUVFR420+cOIEhQ4bg4MGDuOOOOzp8r1/+8pf41a9+ddV6h8MBQ5CecEDBJyI4e/YstmzZgtWrV2P//v1obm6GyWTCsGHDkJKSgkGDBqFv376IioqCRqPBxYsXce7cOVRXV+Po0aM4duwYzp07B61Wi5EjR+Kxxx7D9OnT0a9fPz6LKwQ5nU4YjUa/vpsBX+2qrKyE1WpFS0sLoqOjkZ+fj9TUVFRUVCA8PPyqCZvMZjNsNhsAwGazwWw2X9V+ue1acnJysHDhQu9rp9OJpKSkQEunHqYoCvr27YtZs2ZhxowZOH78OEpKSrBz504cPnwYhw8fRktLC9xuN0TEOym9RqNBREQETCYTbr/9dtxzzz24//77cfvttyMmJoYnlG8SAYfPbbfdhoqKCjgcDqxfvx5ZWVkoKSnpjtq89Ho99Hp9t74HdZ/Lt2SMGjUKo0aNwrPPPosLFy6gtrYWdrsdDocDLZeey67X62EwGLxPSTUajdBqtZ16WgaFtoDDJzw8HEOHDgUAjBkzBvv378dvf/tbPPbYY2htbUVDQ4PP6Mdut3vvr7FYLNi3b5/P/ux2u7eNbm6Xw0On0yE2NhaxsbGc1P2fWJcPmj0eD1wuF8aMGQOdTofi4mJvW1VVFaqrq2G1WgEAVqsVlZWVqKur8/YpKiqCwWBAampqV0shol4koJFPTk4OHnroISQnJ6OxsRGrVq3C9u3bsWXLFhiNRsydOxcLFy5EXFwcDAYDnnvuOVitVqSlpQEAJk+ejNTUVMyePRtLly6FzWbDK6+8guzsbB5WEf2TCSh86urq8MQTT+D06dMwGo0YNWoUtmzZgkmTJgEAli1bBo1Gg5kzZ8LlciEjIwPLly/3bh8WFoaCggI8++yzsFqtiIqKQlZWFl5//fXgfioiCnmdvtSupkAu5xFRzwnku8kfShCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpokvh8+abb0JRFCxYsMC7rqWlBdnZ2YiPj0d0dDRmzpwJu93us111dTUyMzPRp08fJCYm4sUXX4Tb7e5KKUTUy3Q6fPbv34///M//xKhRo3zWP//88/jrX/+KdevWoaSkBLW1tZgxY4a3vb29HZmZmWhtbcXu3bvxpz/9CStXrsSSJUs6/ymIqPeRTmhsbJRhw4ZJUVGR3HfffTJ//nwREWloaBCdTifr1q3z9j169KgAkNLSUhER2bRpk2g0GrHZbN4+eXl5YjAYxOVydfh+LS0t4nA4vEtNTY0AEIfD0ZnyiaibOBwOv7+bnRr5ZGdnIzMzE+np6T7ry8rK0NbW5rN++PDhSE5ORmlpKQCgtLQUI0eOhNls9vbJyMiA0+nE4cOHO3y/3NxcGI1G75KUlNSZsokohAQcPmvWrMHBgweRm5t7VZvNZkN4eDhMJpPPerPZDJvN5u3z3eC53H65rSM5OTlwOBzepaamJtCyiSjEaAPpXFNTg/nz56OoqAgRERHdVdNV9Ho99Hp9j70fEXW/gEY+ZWVlqKurw5133gmtVgutVouSkhK8++670Gq1MJvNaG1tRUNDg892drsdFosFAGCxWK66+nX59eU+RHTzCyh8Jk6ciMrKSlRUVHiXsWPHYtasWd7/1ul0KC4u9m5TVVWF6upqWK1WAIDVakVlZSXq6uq8fYqKimAwGJCamhqkj0VEoS6gw66YmBiMGDHCZ11UVBTi4+O96+fOnYuFCxciLi4OBoMBzz33HKxWK9LS0gAAkydPRmpqKmbPno2lS5fCZrPhlVdeQXZ2Ng+tiP6JBBQ+/li2bBk0Gg1mzpwJl8uFjIwMLF++3NseFhaGgoICPPvss7BarYiKikJWVhZef/31YJdCRCFMERFRu4hAOZ1OGI1GOBwOGAwGtcshoksC+W7y3i4iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUoVW7QI6Q0QAAE6nU+VKiOi7Ln8nL39Hr6dXhs+5c+cAAElJSSpXQkQdaWxshNFovG6fXhk+cXFxAIDq6uobfsBQ43Q6kZSUhJqaGhgMBrXL8Rvr7lm9tW4RQWNjI/r373/Dvr0yfDSab09VGY3GXvUX810Gg6FX1s66e1ZvrNvfAQFPOBORKhg+RKSKXhk+er0er732GvR6vdqlBKy31s66e1ZvrTsQivhzTYyIKMh65ciHiHo/hg8RqYLhQ0SqYPgQkSoYPkSkil4ZPu+//z4GDRqEiIgIjB8/Hvv27VO1nh07dmDq1Kno378/FEXBhg0bfNpFBEuWLEG/fv0QGRmJ9PR0HD9+3KdPfX09Zs2aBYPBAJPJhLlz5+LChQvdWndubi7GjRuHmJgYJCYm4pFHHkFVVZVPn5aWFmRnZyM+Ph7R0dGYOXMm7Ha7T5/q6mpkZmaiT58+SExMxIsvvgi3291tdefl5WHUqFHeX/9arVZs3rw5pGvuyJtvvglFUbBgwYJeV3tQSC+zZs0aCQ8Plz/84Q9y+PBhefrpp8VkMondbletpk2bNsnLL78sH3/8sQCQ/Px8n/Y333xTjEajbNiwQb744guZNm2aDB48WJqbm719HnzwQRk9erTs2bNHdu7cKUOHDpXHH3+8W+vOyMiQP/7xj3Lo0CGpqKiQKVOmSHJysly4cMHb55lnnpGkpCQpLi6WAwcOSFpamtx9993edrfbLSNGjJD09HQpLy+XTZs2SUJCguTk5HRb3Rs3bpTCwkL56quvpKqqShYvXiw6nU4OHToUsjVfad++fTJo0CAZNWqUzJ8/37u+N9QeLL0ufO666y7Jzs72vm5vb5f+/ftLbm6uilX9nyvDx+PxiMVikbfeesu7rqGhQfR6vaxevVpERI4cOSIAZP/+/d4+mzdvFkVR5NSpUz1We11dnQCQkpISb506nU7WrVvn7XP06FEBIKWlpSLybfBqNBqx2WzePnl5eWIwGMTlcvVY7bGxsfLBBx/0ipobGxtl2LBhUlRUJPfdd583fHpD7cHUqw67WltbUVZWhvT0dO86jUaD9PR0lJaWqljZtZ04cQI2m82nZqPRiPHjx3trLi0thclkwtixY7190tPTodFosHfv3h6r1eFwAPi/WQPKysrQ1tbmU/vw4cORnJzsU/vIkSNhNpu9fTIyMuB0OnH48OFur7m9vR1r1qxBU1MTrFZrr6g5OzsbmZmZPjUCvePPO5h61V3tZ8+eRXt7u88fPACYzWYcO3ZMpaquz2azAUCHNV9us9lsSExM9GnXarWIi4vz9uluHo8HCxYswD333IMRI0Z46woPD4fJZLpu7R19tstt3aWyshJWqxUtLS2Ijo5Gfn4+UlNTUVFREbI1A8CaNWtw8OBB7N+//6q2UP7z7g69Knyo+2RnZ+PQoUPYtWuX2qX45bbbbkNFRQUcDgfWr1+PrKwslJSUqF3WddXU1GD+/PkoKipCRESE2uWorlcddiUkJCAsLOyqs/92ux0Wi0Wlqq7vcl3Xq9lisaCurs6n3e12o76+vkc+17x581BQUIBt27ZhwIAB3vUWiwWtra1oaGi4bu0dfbbLbd0lPDwcQ4cOxZgxY5Cbm4vRo0fjt7/9bUjXXFZWhrq6Otx5553QarXQarUoKSnBu+++C61WC7PZHLK1d4deFT7h4eEYM2YMiouLves8Hg+Ki4thtVpVrOzaBg8eDIvF4lOz0+nE3r17vTVbrVY0NDSgrKzM22fr1q3weDwYP358t9UmIpg3bx7y8/OxdetWDB482Kd9zJgx0Ol0PrVXVVWhurrap/bKykqf8CwqKoLBYEBqamq31X4lj8cDl8sV0jVPnDgRlZWVqKio8C5jx47FrFmzvP8dqrV3C7XPeAdqzZo1otfrZeXKlXLkyBH56U9/KiaTyefsf09rbGyU8vJyKS8vFwDy9ttvS3l5uZw8eVJEvr3UbjKZ5JNPPpEvv/xSpk+f3uGl9jvuuEP27t0ru3btkmHDhnX7pfZnn31WjEajbN++XU6fPu1dLl686O3zzDPPSHJysmzdulUOHDggVqtVrFart/3ypd/JkydLRUWFfPrpp9K3b99uvfS7aNEiKSkpkRMnTsiXX34pixYtEkVR5G9/+1vI1nwt373a1dtq76peFz4iIu+9954kJydLeHi43HXXXbJnzx5V69m2bZsAuGrJysoSkW8vt7/66qtiNptFr9fLxIkTpaqqymcf586dk8cff1yio6PFYDDInDlzpLGxsVvr7qhmAPLHP/7R26e5uVl+/vOfS2xsrPTp00f+5V/+RU6fPu2zn2+++UYeeughiYyMlISEBPnFL34hbW1t3Vb3U089JQMHDpTw8HDp27evTJw40Rs8oVrztVwZPr2p9q7ifD5EpIpedc6HiG4eDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTx/wHqkVwullAD2wAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第9章-策略梯度算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
